//options
clear all
set maxvar  32000, permanently
set matsize 11000, permanently
set more off, permanently
global bootstraps 1000
set seed 23

// macros
global klmshare				: env klmshare
global projects				: env projects
global klmperry             : env klmperry
global storageb				: env storageb

global perrydatas       = "${klmperry}/CBA/data/perry/raw"
global perrydata        = "${klmperry}/CBA/data/perry/clean"
global masterinter  	= "${klmshare}/CurrentRAs/FredB/test"
global output           = "${projects}/ece_parenting/tex_files/other"
global dataperry        = "$klmperry/PerryPreschool/Data/Perry_PARI_and_Other_Data/PARI/DATA_PARI/"
global abcjpeanalysis   = "${klmshare}/Data_Central/Abecedarian/data/ABC-CARE/extensions/cba-iv/"
global dataihdp         = "${projects}/ece_parenting/data"
global dataihdp         = "$storageb/dc_data/"

cd  $dataihdp
use ihdp_data, clear
rename _all, lower

replace   bw = bw/1000

// impute iq
replace   stndscor =  iqcage     if stndscor ==.
replace   iqcage   =  stndscor   if iqcage ==.
foreach var of varlist stndscor iqcage { 
	replace `var' = mdi_24cor    if stndscor ==. & iqcage ==.
}
replace   stndscor =  iqcage     if stndscor ==.
replace   iqcage   =  stndscor   if iqcage ==.

destring bwg , replace 

// list relevant variables
global baseline_child      twin sex bw anga black hispanic 
global baseline_mother     mage meduc works married 
global baseline_household  welfare tot_siblings_natural employed_adult
global baseline_economy    employment medinc gpc
global outputs             iqcage stndscor 
global inputs              cum_avg_daycare_36m_sum

// mark all sample
reg $baseline_child
gen sample0 = e(sample)

// mark baseline sample
reg $baseline_child $baseline_mother $baseline_household $baseline_economy $outputs $inputs
gen sample1 = e(sample)

// parenting latents, ages 1 and 3
foreach var of varlist alt_subscale_1_12-alt_subscale_6_12 alt_subscale_1_36-alt_subscale_8_36 {
	summ `var'
	gen  `var'_std = (`var' - r(mean))/r(sd) 																	   if sample1 == 1
}
# delimit
sem (_cons@0 X -> alt_subscale_1_12_std) (_cons@0 X -> alt_subscale_2_12_std) (_cons@0 X -> alt_subscale_3_12_std) 
	(_cons@0 X -> alt_subscale_4_12_std) (_cons@0 X -> alt_subscale_5_12_std) (_cons@0 X -> alt_subscale_6_12_std) if sample1 == 1, method(adf);
predict parenting_age1 if sample1 == 1, latent;
sem (_cons@0 X -> alt_subscale_1_36_std) (_cons@0 X -> alt_subscale_2_36_std) (_cons@0 X -> alt_subscale_3_36_std) (_cons@0 X -> alt_subscale_4_36_std)
	(_cons@0 X -> alt_subscale_5_36_std) (_cons@0 X -> alt_subscale_6_36_std) (_cons@0 X -> alt_subscale_7_36_std) (_cons@0 X -> alt_subscale_8_36_std) if sample1 == 1, 
																																						 cov(e.alt_subscale_1_36_std*e.alt_subscale_2_36_std)
																																						 cov(e.alt_subscale_3_36_std*e.alt_subscale_4_36_std)
																																						 cov(e.alt_subscale_5_36_std*e.alt_subscale_6_36_std)
																																						 cov(e.alt_subscale_7_36_std*e.alt_subscale_8_36_std)
																																						 cov(e.alt_subscale_4_36_std*e.alt_subscale_7_36_std)
																																						 cov(e.alt_subscale_2_36_std*e.alt_subscale_7_36_std) method(adf);		
 predict parenting_age3 if sample1 == 1, latent;
# delimit cr
egen    parenting_ages13 = rowmean(parenting_age1 parenting_age3)

global inputs_std   cum_avg_daycare_36m_sum parenting_ages13
keep   ihdp site tg bwg sample* $baseline_child $baseline_mother $baseline_household $baseline_economy $inputs_std $outputs ppvtstd wasifsiq

// merge in ages 5 and 8
merge 1:1 ihdp using iqchildhoodihdp
keep  if _merge == 3
drop _merge

// impute iq
replace kidppvt5 = wippsif5 if kidppvt5 ==.
replace wippsif5 = kidppvt5 if wippsif5 ==.

replace ppvtstd8 = fsiq8    if ppvtstd8 ==.
replace fsiq8    = ppvtstd8 if fsiq8    ==.

replace ppvtstd  = wasifsiq if ppvtstd  ==.
replace wasifsiq = ppvtstd  if wasifsiq ==.

reg fsiq8 ppvtstd8 kidppvt5 wippsif5 wasifsiq ppvtstd
gen sample2 = e(sample) 

// outcomes
merge 1:1 ihdp using adultihdp
keep if _merge == 3
drop    _merge

// construct "positive" outcomes
replace idle18y         = 1 - idle18y
replace sch_eversped18y = 1 - sch_eversped18y
replace sch_math18y     = 1 - sch_math18y
replace sch_read18y     = 1 - sch_read18y
replace sch_thrp18y     = 1 - sch_thrp18y
replace teen18y         = 1 - teen18y

gen     smoke18 = .
replace smoke18 = 1 if cigs18y >  0  & cigs18y != .
replace smoke18 = 0 if cigs18y == 0 & cigs18y != .
replace smoke18 = 1 - smoke18

gen     absence18 = .
replace absence18 = 0 if sch_dabs18y < 5  & sch_dabs18y != .
replace absence18 = 1 if sch_dabs18y >= 5 & sch_dabs18y != .
replace absence18 = 1 - absence18 

// marking missing
global ed18  sch_eversped18y sch_math18y sch_read18y sch_test18y
global beh18 smoke18 idle18y sch_thrp18y teen18y
egen missing     = rowmiss($ed18 $beh18)

// average
egen avg_outcome_educ18 = rowmean($ed18)        if missing == 0
egen avg_outcome_beh18  = rowmean($beh18)       if missing == 0
egen avg_outcome18      = rowmean(avg_outcome_educ18 avg_outcome_beh18) if missing == 0

// define sample
reg     avg_outcome18 if sample2 == 1
replace sample2 = 0   if e(sample) == 0
replace sample2 = 0   if   sample1 == 0

// lists of outcomes
global iq3    iqcage stndscor
global iq5    kidppvt5 wippsif5
global iq8    ppvtstd8 fsiq8
global iq18   ppvtstd wasifsiq
global niq18  sch_eversped18y sch_read18y sch_math18y sch_test18y avg_outcome_educ18 smoke18 idle18y sch_thrp18y teen18y avg_outcome_beh18 avg_outcome18

rename tg treat
gen control = 1 - treat

// relevant sample
keep if sample2 == 1

// specs for run
gen ihdps = 1 if twin == 0
gen ihdpt = 1 if twin == 1

// strata
global ihdps_strata strata(bwg site treat)
global ihdpt_strata strata(bwg site treat)

// treatment effects
// programs
foreach prog in ihdps ihdpt {
	bootstrap, ${`prog'_strata} reps($bootstraps): reg avg_outcome18 treat if `prog' == 1
	matrix b = e(b)
	matrix V = e(V)
	matrix se`prog' = sqrt(V[1,1])
	matrix md`prog' = b[1,1]
	matrix  t`prog' = abs(md`prog'[1,1]/se`prog'[1,1])
	matrix df`prog' = e(N) - e(rank)
	matrix  p`prog' = 2*(1 - normal(t`prog'[1,1]))
	matrix  n`prog' = e(N)
	
	reg avg_outcome18 control treat if `prog' == 1, nocons
	matrix b = e(b)
	matrix bc = b[1,1]
	matrix bt = b[1,2]
	
	matrix `prog' = [bc,bt,md`prog',p`prog',n`prog']
}
matrix all_0 = [ihdps]
matrix all_1 = [ihdpt]


// sites 
gen      states = 1 if site == "ARK"
replace  states = 5 if site == "EIN"
replace  states = 4 if site == "HAR"
replace  states = 3 if site == "MIA"
replace  states = 6 if site == "PEN"
replace  states = 7 if site == "TEX"
replace  states = 8 if site == "WAS"
replace  states = 2 if site == "YAL"

levelsof states, local(sites)
foreach sib of numlist 0 1 {
	foreach state in `sites' {
		bootstrap, strata(bwg site treat) reps($bootstraps): reg avg_outcome18 treat if states == `state' & twin == `sib'
		matrix b = e(b)
		matrix V = e(V)
		matrix se = sqrt(V[1,1])
		matrix md = b[1,1]
		matrix  t = abs(md[1,1]/se[1,1])
		matrix df = e(N) - e(rank)
		matrix  p = 2*(1 - normal(t[1,1]))
		matrix  n = e(N)
		
		reg avg_outcome18 control treat if states == `state' & twin == `sib', nocons
		matrix b = e(b)
		matrix bc = b[1,1]
		matrix bt = b[1,2]
		
		matrix all_`sib' = [all_`sib' \ [bc,bt,md,p,n]]
	}
}

matrix colnames all_0 = bc_0 bt_0 md_0 p_0 n_0
matrix colnames all_1 = bc_1 bt_1 md_1 p_1 n_1
matrix all = [all_0,all_1]

clear
svmat all, names(col)
// labels

foreach sib of numlist 0 1 {
	tostring n_`sib', replace force format(%15.0fc)
	replace  n_`sib' = "N = " + n_`sib'
	tostring md_`sib', gen(md_`sib'str) force format(%15.2fc)
	replace  md_`sib'str = "{&Delta} = " + md_`sib'str
}
	

gen n = _n*2 - 2 + 4
gen ppt1 = .74
gen ppt2 = .73

// plots
global la_0 Singletons:
global la_1 Twins

foreach num of numlist 0 {
	#delimit
	twoway     (bar     bc_`num'  n, color(gs10) barwidth(.8)          yline(.45, lpattern(solid) lcolor(gs14)))
		   (bar     bt_`num'  n, fcolor(none) lcolor(gs0) barwidth(.8) xline(5  , lwidth(vthick) lpattern(solid) lcolor(gs10)))

		   (scatter ppt1  n, msymbol(none) mlabel(md_`num'str)    mlabposition(12) mlabcolor(gs0) mlabsize(vsmall))
		   (scatter ppt2  n, msymbol(none) mlabel(n_`num')        mlabposition(12) mlabcolor(gs0) mlabsize(vsmall))
		   
		   (scatter bt_`num'    n    if  p_`num' < 0.05                  , msize(small)  mcolor(gs0) msymbol(circle))
		   (scatter bt_`num'    n    if  p_`num' >= .05 & p_`num'    <= 0.10 , msize(small)  mcolor(gs0) msymbol(square))
		  

		   
		   ,
			  legend(label(1 "Control") label(2 "Treatment")
				 label(5 "{&Delta} {&ne} 0 ({it:p}-value < 0.05)")
				 label(6 "{&Delta} {&ne} 0 ({it:p}-value {&isin} [0.05,.10])")
				cols(2) rows(2) order(1 2 5 6) size(medsmall) position(6) region(lcolor(gs0)))
					 
			  ylabel(.45[.1].75, labsize(medium) angle(h) glcolor(gs14) glpattern(solid))  
			  xlabel(4 `" "${la_`num'}" "{bf:Pooled}" "'
				 6  "AR" 8  "CT" 10 "FL" 12 "MA" 13 `" " " "Singeltons: {bf:By Site}" "'
				 14 "NY" 16 "PA" 18 "TX" 20 "WA"
			  ,  labsize(small) grid glcolor(gs14) glpattern(solid))
			  ytitle("Unadjusted Mean", size(small))
			  xtitle("")
			  graphregion(color(white)) plotregion(fcolor(white)); 
	#delimit cr
	cd $output
	graph export TE_ncsbiq_by_site_s`num'.eps, replace
}
